#coding=utf-8
import redis
from lib.tag import *
from lib.config import *
from lib.thing import ThreadPool
import multiprocessing,threading
import sys
socket.setdefaulttimeout(settings.timeout)
tt = []
info = {}
def replacebody(strbody):
    if info.has_key('rebody'):
        for i in info['rebody']:
            strbody = strbody.replace(i[0],i[1])
    return strbody
def recontent(strcontent):
    if info.has_key('recontent'):
        for i in info['recontent']:
            strcontent = strcontent.replace(i[0],i[1])
    return strcontent
def fetchpagethreads(id,name,tid,ttype,content,conn,t=False,p=None,u=0,li=0):
    if t is False and content.find(info['endpagetag']) != -1:
        try:
            pagecount = int(eval(info['pagecounttag']))
        except Exception:
            pagecount = 0
        step =info['step'] if info.has_key('step') else settings.step
        startvpage = info['startvpage'] if info.has_key('startvpage') else settings.startvpage
        pagecount = int(pagecount+step)
        au = 0
        for i in range(int(startvpage),pagecount,int(step)):
            if au < u: continue
            au +=1
            if(info.has_key('pagepid')):
                for u in info['pagepid']:
                    id = id.replace(u[0],u[1])
                p = info['vpage'] % (id % i)
            else:
                p = info['vpage'] % (id,i)
            logmsg('%s(%s/%s)'%(p,i,pagecount-step),info['f'])
            try:
                time.sleep(info['time_s'] if info.has_key('time_s') else settings.time_s)
                content = urllib2.urlopen(p).read().decode(info['pagechatset'],'ignore')
            except urllib2.URLError:
                time.sleep(info['time_s'] if info.has_key('time_s') else settings.time_s)
                content = vpageurl(p,info['f'],info['pagechatset'])
            except Exception:
                time.sleep(info['time_s'] if info.has_key('time_s') else settings.time_s)
                content = vpageurl(p,info['f'],info['pagechatset'])
            if content == -1:
                continue
            fetchpagethreads(id,name,tid,ttype,content,conn,True,p)
        c = conn.cursor()
        c.execute("""update ti_thread set `u`=%s  where id=%s"""%(au,tid))
        c.close()
    if(info.has_key('commenttag')):
        try:
            r = HTML.document_fromstring(recontent(content))
        except ValueError:
            logmsg('vpage content失败：%s'%p,info['f'])
            return None
        node = r.xpath(info['commenttag'][0])
        names = []
        times = []
        commentbody = []
        for i in node:
            for j in info['commenttag'][1]:
                nntag = i.find(info['commenttag'][1][j])
                if nntag is None:
                    if(j == 'name'):
                        names.append('')
                    elif (j =='time'):
                        times.append('')
                    elif (j =='body'):
                        commentbody.append(' ')
                    continue
                if (j == 'body'):
                    try:
                        data = '%s'%nntag.text + ''.join([HTML.tostring(y,encoding=info['pagechatset']).strip() for y in nntag.getchildren()]).decode(info['pagechatset'])
                    except TypeError:
                        data = nntag.text
                    except UnicodeDecodeError:
                        data = nntag.text
                    except AttributeError:
                        data = ' '
                else:
                    data = nntag.text
                if(j == 'name'):
                    data = data if len(data) < 30 else data[0:30]
                    names.append(data)
                elif (j =='time'):
                    times.append(str(data).strip())
                elif (j =='body'):
                    commentbody.append(data)
    else:
        names = []
        if(info.has_key('commentnametag')):
            names = eval(info['commentnametag'])
        times = eval(info['commenttimestag'])
        commentbody = eval(info['commentbody'])
    j = 0
    c = conn.cursor()
    for i in commentbody:
        if j <= li: continue
        if i is None: continue
        if len(times) > 0:
            try:
                dateline = time.mktime(time.strptime(deltag(times[j]),info['dateformat'] if info.has_key('dateformat') else settings.dateformat))
            except ValueError:
                dateline = 0
        else:
            dateline = 0
        if info.has_key('commentnametag') or len(names)>0:
            try:
                sname = names[j]
            except ValueError:
                sname = 'admin'
        else:
            sname = ''
        j+=1

        try:
            if info.has_key('onlyname'):
                if name != sname:
                    continue
            c.execute("insert into ti_threads (`aid`,`dateline`,`body`,`tid`,`tid2`,`name`)values(%s,%s,%s,%s,%s,%s)",
                (tid,dateline,dcode(deltag(replacebody(i))),info['typen'],ttype,dcode(name))
            )
        except Exception as e:
            print 'threads error %s'%e
            pass
        # u 为页 , orde是当前列
    c.execute("""update ti_thread set `purl`='%s' ,`orde`=%s,`pubdate`=%s where id=%s"""%(p,len(commentbody),int(time.time()),tid))
    conn.commit()
    c.close()
def doupdatepage(i):
    global info
    info = {}
    reload(settings)
    for j in i:
        info[j] = i[j]
    r = redis.Redis(host=settings.redisinfo[0], port=settings.redisinfo[1], db=settings.redisinfo[2])
    conn = MySQLdb.Connect(host=settings.mysqlinfo[0],user=settings.mysqlinfo[1],passwd=settings.mysqlinfo[2],db=settings.mysqlinfo[3],charset=settings.mysqlinfo[4],port=settings.mysqlinfo[5])
    c = conn.cursor()
    tid = info['typen']
    # u为页数 ，orde为当前页的总列
    c.execute("select id,url,purl,u,orde,name from ti_thread where tid = %s and `show`=1 and `purl` != '' and `pubdate` < ( UNIX_TIMESTAMP( ) -86400 *7 ) ",(tid))
    tt = c.fetchall()
    for i in tt:
        page = str(i[2] if i[2] != 'None' else i[1])
        try:
            id = re.compile('%s'%info['page'].replace('?','\?').replace('%s','(.+?)'),re.DOTALL).findall(page)[0]
        except IndexError:
            id = re.compile('%s'%info['vpage'].replace('?','\?').replace('%s','(.+?)'),re.DOTALL).findall(page)[0][0]
        try:
            content = urllib2.urlopen(page).read().decode(info['pagechatset'],'ignore')
            time.sleep(info['time_s'] if info.has_key('time_s') else settings.time_s)
        except urllib2.URLError:
            logmsg('vpage error 1:%s' % page,info['f'])
            continue
        except Exception:
            logmsg('vpage error 2:%s' % page,info['f'])
            continue
        u = i[3]
        li = i[4]
        name = i[5]
        fetchpagethreads(id,name,tid,0,content,conn,False,None,u,li)
    pass
def doupdate():
    if len(sys.argv) > 1:
        if sys.argv[1] =='-s':
            for i in settings.ding_rule:
                if int(sys.argv[2]) == i['sid']:
                    print i['f'],u'更新开始',time.strftime('%Y-%m-%d %H:%M:%S',time.gmtime(time.time() + 8*60*60))
                    threading.Thread(target=doupdatepage,args=(i,)).start()
        elif sys.argv[1] =='-m':
            for i in settings.ding_rule:
                print i['f'],u'更新开始',time.strftime('%Y-%m-%d %H:%M:%S',time.gmtime(time.time() + 8*60*60))
                threading.Thread(target=doupdatepage,args=(i,)).start()
    else:
        for i in settings.ding_rule:
            print i['f'],u'更新开始',time.strftime('%Y-%m-%d %H:%M:%S',time.gmtime(time.time() + 8*60*60))
            threading.Thread(target=doupdatepage,args=(i,)).start()
    pass
if __name__=='__main__':
    if len(sys.argv) > 1:
        if sys.argv[1] == '-m' and sys.argv[2] == 'sohu':
            import settings_sohu as settings
        elif sys.argv[1] == '-h':
            print '--------help information----------'
            print '      -s : this is ding_rule sid example: run.py -s 152'
            print '      -m : this is module ding_rule example: runpy -m sohu'
            exit()
    doupdate()